sampleDists <- dist(t(assay(rld)))
plot(hclust(sampleDists))annot = dplyr::select(experimental_metadata, condition)
row.names(annot) = experimental_metadata$sample_id
rld %>%
assay() %>%
cor() %>%
pheatmap(color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
annotation = annot,
annotation_colors = list(condition = c(Control = "#BF3100", OKSM = "#E9B44C",
Senolytic = "#1B998B", Senolytic_OKSM = "#5D576B")),
cluster_rows = TRUE,
cluster_cols = T,
cellwidth = 13,
cellheight = 13)ntop = 500
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca = prcomp(t(assay(rld)[select,]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
pca_data <- plotPCA(rld, intgroup = c("condition", "replicate"), returnData=TRUE)
percentVar <- round(100 * attr(pca_data, "percentVar"), digits=2)
ggplot(pca_data, aes(PC1, PC2, color=condition, shape=replicate)) + geom_point(size=3) +
scale_x_continuous(paste0("PC1: ",percentVar[1],"% variance")) +
scale_y_continuous(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_classic() + geom_text(data = pca_data, aes(PC1,PC2, label = name), hjust = 1.2)effective_lengths = matrix(0, ncol=length(experimental_metadata$sample_id), nrow=17714)
colnames(effective_lengths)= experimental_metadata$sample_id
for( i in experimental_metadata$sample_id){
effective_lengths[,i] = read.table(paste("data/ipsc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$effective_length
}
row.names(effective_lengths) = read.table(paste("data/ipsc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$gene_id
effective_lengths = rowMeans(effective_lengths[row.names(counts(dds)),])
ncrpk = counts(dds) / (effective_lengths / 1000)
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.nan(x)){0}else{x}})
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.infinite(x)){0}else{x}})
ncscalingfactor = colSums(ncrpk) / 1e6
nctpm = sweep(ncrpk, 2, ncscalingfactor, "/")
nctpm.melt = melt(nctpm)
ggplot(nctpm.melt, aes(x=Var2, y=value)) + geom_boxplot() + theme_classic() + theme(axis.text.x = element_text(angle = 90, colour="black", hjust = 1)) + scale_x_discrete("Sample") + scale_y_continuous("TPM")tpm.threshold = 20000
test.tpm = apply(nctpm, 1, function(x){ any(x> tpm.threshold) })
ensembl.genes[names(test.tpm[test.tpm])] %>%
as.data.frame() %>%
datatable(options = list(scrollX = TRUE))generate_de_section(dds_wald, "OKSM", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1258"
generate_de_section(dds_wald, "Senolytic", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1399"
generate_de_section(dds_wald, "Senolytic_OKSM", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1311"
generate_de_section(dds_wald, "OKSM", "Senolytic")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 124"
generate_de_section(dds_wald, "OKSM", "Senolytic_OKSM")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 109"
generate_de_section(dds_wald, "Senolytic", "Senolytic_OKSM")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 0"
dds_LRT = nbinomLRT(dds, reduced = ~1)
res = results(dds_LRT)
res$gene_biotype= ensembl.genes$gene_biotype[match(row.names(res), ensembl.genes$gene_id)]
res$external_gene_name= ensembl.genes$external_gene_name[match(row.names(res), ensembl.genes$gene_id)]
hist(res$pvalue) Number of significant genes (padj < 0.1):
sum(res$padj < 0.1, na.rm=T)## [1] 2967
# assay(x) to access the count data
sig_mat_rld = assay(significant_rld)
# The apply function swaps the rows to samples and columns to genes -- the standard is the other way around: samples in cols and genes in rows, hence the transpose function
zscores = t(apply(sig_mat_rld, 1, function(x){ (x - mean(x)) / sd(x) }))foo = as(zscores, "matrix")
bar = sapply(1:10, function(x){kmeans(foo, centers=x)$tot.withinss})
plot(bar, type="l")pam_clust <- generate_data(zscores, 7, "pam")
# pam_clust <- as.data.frame(pam_clust)
# pam_clust$Cluster <- factor(pam_clust$Cluster, levels = c(5,4,3,1,2,6))
# pam_clust <- pam_clust[order(pam_clust$Cluster),]
pheatmap(pam_clust[,1:(ncol(pam_clust)-1)],
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
fontsize_row = 5.5,
annotation_col = annotation,
annotation_colors = anno_colours,
cluster_rows = FALSE,
cluster_cols = FALSE)pam_clust_df <- pam_clust %>%
as.data.frame() %>%
mutate(gene_name = ensembl.genes[rownames(.),]$external_gene_name) %>%
dplyr::select(gene_name, Cluster) %>%
dplyr::rename("Gene Name" = gene_name)
datatable(pam_clust_df, options = list(scrollX = TRUE), class = "white-space: nowrap")c1 <- pam_clust_df[pam_clust_df$Cluster == 1, ] %>%
dplyr::select(-Cluster)
saveRDS(c1, "output/ipsc/ipsc_c1.rds")
c2 <- pam_clust_df[pam_clust_df$Cluster == 2, ] %>%
dplyr::select(-Cluster)
saveRDS(c2, "output/ipsc/ipsc_c2.rds")
c3 <- pam_clust_df[pam_clust_df$Cluster == 3, ] %>%
dplyr::select(-Cluster)
saveRDS(c3, "output/ipsc/ipsc_c3.rds")
c4 <- pam_clust_df[pam_clust_df$Cluster == 4, ] %>%
dplyr::select(-Cluster)
saveRDS(c4, "output/ipsc/ipsc_c4.rds")
c5 <- pam_clust_df[pam_clust_df$Cluster == 5, ] %>%
dplyr::select(-Cluster)
saveRDS(c5, "output/ipsc/ipsc_c5.rds")
c6 <- pam_clust_df[pam_clust_df$Cluster == 6, ] %>%
dplyr::select(-Cluster)
saveRDS(c6, "output/ipsc/ipsc_c6.rds")
c7 <- pam_clust_df[pam_clust_df$Cluster == 7, ] %>%
dplyr::select(-Cluster)
saveRDS(c7, "output/ipsc/ipsc_c7.rds")
data.frame(Cluster = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4",
"Cluster 5", "Cluster 6", "Cluster 7", "Total"),
Number_of_genes = c(nrow(c1), nrow(c2), nrow(c3), nrow(c4),
nrow(c5), nrow(c6), nrow(c7),
sum(c(nrow(c1),nrow(c2),nrow(c3),nrow(c4),
nrow(c5),nrow(c6),nrow(c7)))))## Cluster Number_of_genes
## 1 Cluster 1 773
## 2 Cluster 2 397
## 3 Cluster 3 128
## 4 Cluster 4 566
## 5 Cluster 5 613
## 6 Cluster 6 248
## 7 Cluster 7 242
## 8 Total 2967
## Checking the stability of clusters
#fviz_nbclust(zscores, kmeans, method = "silhouette")
set.seed(2)
dd = as.dist((1 - cor(t(zscores)))/2)
pam = pam(dd, 7, diss = TRUE)
sil = silhouette(pam$clustering, dd)
plot(sil, border=NA, main = "Silhouette Plot for 7 Clusters")#listEnrichrSites()
setEnrichrSite("FlyEnrichr")
dbs <- listEnrichrDbs()
to_check <- c("GO_Biological_Process_2018", "KEGG_2019")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] enrichR_3.0 DT_0.19
## [3] RColorBrewer_1.1-2 scales_1.1.1
## [5] reshape2_1.4.4 knitr_1.36
## [7] biomaRt_2.50.0 GenomicFeatures_1.46.1
## [9] AnnotationDbi_1.56.2 genefilter_1.76.0
## [11] DESeq2_1.34.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0 MatrixGenerics_1.6.0
## [15] matrixStats_0.61.0 BSgenome.Dmelanogaster.UCSC.dm6_1.4.1
## [17] BSgenome_1.62.0 rtracklayer_1.54.0
## [19] GenomicRanges_1.46.0 Biostrings_2.62.0
## [21] GenomeInfoDb_1.30.0 XVector_0.34.0
## [23] IRanges_2.28.0 S4Vectors_0.32.2
## [25] BiocGenerics_0.40.0 forcats_0.5.1
## [27] stringr_1.4.0 dplyr_1.0.7
## [29] purrr_0.3.4 readr_2.0.2
## [31] tidyr_1.1.4 tibble_3.1.6
## [33] tidyverse_1.3.1 EnhancedVolcano_1.12.0
## [35] ggrepel_0.9.1 ggplot2_3.3.5
## [37] pheatmap_1.0.12 cluster_2.1.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.3.0 BiocFileCache_2.2.0
## [4] plyr_1.8.6 splines_4.1.2 crosstalk_1.2.0
## [7] BiocParallel_1.28.0 digest_0.6.28 htmltools_0.5.2
## [10] fansi_0.5.0 magrittr_2.0.1 memoise_2.0.0
## [13] tzdb_0.2.0 annotate_1.72.0 modelr_0.1.8
## [16] extrafont_0.17 extrafontdb_1.0 prettyunits_1.1.1
## [19] colorspace_2.0-2 blob_1.2.2 rvest_1.0.2
## [22] rappdirs_0.3.3 haven_2.4.3 xfun_0.29
## [25] crayon_1.4.2 RCurl_1.98-1.5 jsonlite_1.7.2
## [28] survival_3.2-13 glue_1.5.0 gtable_0.3.0
## [31] zlibbioc_1.40.0 DelayedArray_0.20.0 proj4_1.0-10.1
## [34] car_3.0-12 Rttf2pt1_1.3.9 maps_3.4.0
## [37] abind_1.4-5 DBI_1.1.1 rstatix_0.7.0
## [40] Rcpp_1.0.7 xtable_1.8-4 progress_1.2.2
## [43] bit_4.0.4 htmlwidgets_1.5.4 httr_1.4.2
## [46] ellipsis_0.3.2 farver_2.1.0 pkgconfig_2.0.3
## [49] XML_3.99-0.8 sass_0.4.0 dbplyr_2.1.1
## [52] locfit_1.5-9.4 utf8_1.2.2 labeling_0.4.2
## [55] tidyselect_1.1.1 rlang_0.4.12 munsell_0.5.0
## [58] cellranger_1.1.0 tools_4.1.2 cachem_1.0.6
## [61] cli_3.1.0 generics_0.1.1 RSQLite_2.2.8
## [64] broom_0.7.10 evaluate_0.14 fastmap_1.1.0
## [67] yaml_2.2.1 bit64_4.0.5 fs_1.5.0
## [70] KEGGREST_1.34.0 ash_1.0-15 ggrastr_1.0.1
## [73] xml2_1.3.2 compiler_4.1.2 rstudioapi_0.13
## [76] beeswarm_0.4.0 filelock_1.0.2 curl_4.3.2
## [79] png_0.1-7 ggsignif_0.6.3 reprex_2.0.1
## [82] geneplotter_1.72.0 bslib_0.3.1 stringi_1.7.5
## [85] highr_0.9 ggalt_0.4.0 lattice_0.20-45
## [88] Matrix_1.3-4 vctrs_0.3.8 pillar_1.6.4
## [91] lifecycle_1.0.1 jquerylib_0.1.4 bitops_1.0-7
## [94] R6_2.5.1 BiocIO_1.4.0 KernSmooth_2.23-20
## [97] vipor_0.4.5 MASS_7.3-54 assertthat_0.2.1
## [100] rprojroot_2.0.2 rjson_0.2.20 withr_2.4.2
## [103] GenomicAlignments_1.30.0 Rsamtools_2.10.0 GenomeInfoDbData_1.2.7
## [106] parallel_4.1.2 hms_1.1.1 grid_4.1.2
## [109] rmarkdown_2.11 carData_3.0-4 ggpubr_0.4.0
## [112] lubridate_1.8.0 ggbeeswarm_0.6.0 restfulr_0.0.13